## Authors: Savannah Bergquist and Kate Lofgren
## Date: March 17, 2016
## Gov 2001, generate Figures and Tables for Replication Study (Version 3)

## Set up workspace
  rm(list=ls())
  library(foreign); library(ggplot2); library(grid); library(stargazer); 
  library(xtable); library(lme4); library(sandwich); library(lmtest)
  set.seed(02138)
  setwd("~/Dropbox/Harvard/GOV 2001/replication_paper/")
  #setwd("~/Dropbox/replication_paper/")
  
###############
## Functions ##
###############
## Cluster CI Function ##
  # write function to calc clustered se's
  get_CL_vcov<-function(model, cluster){
    require(sandwich, quietly = TRUE)
    require(lmtest, quietly = TRUE)
    
    #calculate degree of freedom adjustment
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- model$rank
    dfc <- (M/(M-1))*((N-1)/(N-K))
    
    #calculate the uj's
    uj  <- apply(estfun(model),2, function(x) tapply(x, cluster, sum))
    
    #use sandwich to get the var-covar matrix
    vcovCL <- dfc*sandwich(model, meat=crossprod(uj)/N)
    return(vcovCL)
  }
  
## graphing function ##
  lay_out = function(...) {    
    x <- list(...)
    n <- max(sapply(x, function(x) max(x[[2]])))
    p <- max(sapply(x, function(x) max(x[[3]])))
    grid::pushViewport(grid::viewport(layout = grid::grid.layout(n, p)))    
    
    for (i in seq_len(length(x))) {
      print(x[[i]][[1]], vp = grid::viewport(layout.pos.row = x[[i]][[2]], 
                                             layout.pos.col = x[[i]][[3]]))
    }
  } 
  
#############################
## General Data Processing ##
#############################
    # bring in the data
    data <- read.dta("./data_and_code/Final_plan_data_12.dta")
    data.original <- data
    
    # Restrictions based on Table 1 in paper
    # no multistate
    data <- data[data$multistate==0,]
    # no rating area 26
    data <- data[data$ra!=26,]
    # drop if plan type is EPO
    data <- data[data$plantype!="EPO",]
    # drop two insurers that only have 1 plan after these restrictions
    data <- data[data$insurer!="CommunityFirst" & data$insurer!="Sendero Health Plans",]
    
    # generate an HMO indicator
    data$hmo <- 0 
    data$hmo[data$plantype=="HMO"] <- 1
    
    # generate a no deductible indicator
    data$no.d <- 0
    data$no.d[data$deductible==0] <- 1
    
    # create logged variables for income and deductible, max out of pocket, premium
    data$ln_pc_income <- log(data$pc_income_2013)
    data$ln_deductible <- log(data$deductible)
    data$ln_deductible[data$no.d==1] <- 0
    data$ln_max_oop <- log(data$max_out_pocket)
    data$ln_prem <- log(data$premium)
    
    # generate binary indicators for each insurance plan
    temp <- as.factor(data$insurer)
    temp_out <- data.frame(model.matrix(~temp-1))
    data <- cbind(data,temp_out)
    
    # make a shorter version of insuerer names
    names(data)[24:32] <- c("aetna","ambetter","bcbs","cigna","community_choice","firstcare","humana","molina","scott_white")
    
    # generate cluster variable
    data$fac.network <- as.factor(data$network)
    data$cluster <- as.factor(interaction(data$fac.network,data$ra))
    data$cluster <- as.numeric(data$cluster)
    
    # create binary variables for each rating area (ra)
    ra <- as.factor(data$ra)
    ra_out <- data.frame(model.matrix(~ra-1))
    data <- cbind(data,ra_out)  
    data.all <- data
    data <- data[data$metal== "Silver",] # want to subset to silver for now
    
#################################   
## Figure 1: Exact Replication ##
#################################
  # read in data
  fig1 <- read.dta("./data_and_code/Figure_1_data_12.dta")
  
  # recreate original graph
  fig1$place <- as.factor(fig1$rabypop_new)
  
  # plot
pdf("./rep_figs/figure1_same_style_notitle.pdf",height=12,width=14)
 
  # set up plot parameters (margin widths, ability to plot outside of plot)
  par(mai=c(1.5,2,1,1),xpd=F)
 
   # make plot with different symbols/colors for different plan types
  plot(fig1$frac,fig1$place,type="n",ylab="",xlab="Share of Discharges",yaxt="n",xlim=c(0,1),cex.axis=1.5,las=1,cex.lab=1.4,bty="n")
  abline(h=c(1,2,3,4,5,6),col="gray45")
  points(fig1$frac[fig1$group=="BCBS (HMO)"],fig1$place[fig1$group=="BCBS (HMO)"],pch=5,cex=3,col="red",lwd=4)
  points(fig1$frac[fig1$group=="BCBS (PPO)"],fig1$place[fig1$group=="BCBS (PPO)"],pch=1,cex=3,col="black",lwd=4)
  points(fig1$frac[fig1$group=="Other"],fig1$place[fig1$group=="Other"],pch=2,cex=3,col="forestgreen",lwd=4)
  
  # customize the graph
  axis(2,at=unique(fig1$place),labels=c("Dallas","Houston","San Antonio","Victoria","San Angelo","Texarkana"),las=2,cex=1.8)
  # title("Figure 1: Breath of Networks Offered in Largest and Smallest Texas Ratings Areas",cex.main=1.7)
  par(xpd=TRUE)
  legend(0.2,6.8,legend=c("Other","BCBC (HMO)","BCBS (PPO)"),pch=c(2,5,1),col=c("forestgreen","red","black"),bty="n",ncol=3,cex=1.4,lty=c(0,0,0),lwd=4)
  
dev.off()
 
###################################################
## Regression for Table 1 (just on silver plans) ##
###################################################
# run regressions to replicate Table 1
# Regression 0 (not reported)
  m0 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + bcbs + 
                      ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                      ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                      ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
# Regression 1
  m1 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
  
  # adjust se's for clustering
  m1.vcovCL<-get_CL_vcov(m1, data$cluster)
  # extract se's on diagonal
  se.1 <- sqrt(diag(m1.vcovCL))
  
# Regression 2
  m2 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  m2.vcovCL<-get_CL_vcov(m2, data$cluster[data$bcbs==0])
  se.2 <- sqrt(diag(m2.vcovCL))
  
# Regression 3
  m3 <- lm(ln_prem ~ frac + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m3.vcovCL<-get_CL_vcov(m3, data$cluster[data$bcbs==1])
  se.3 <- sqrt(diag(m3.vcovCL))
  
# Regression 4
  m4 <- lm(ln_prem ~ frac + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m4.vcovCL<-get_CL_vcov(m4, data$cluster[data$bcbs==1])
  se.4 <- sqrt(diag(m4.vcovCL))

###################
## Make Table 1 ##
##################
# Make a table to report the results
  table <- stargazer(m1,m2,m3,m4, 
                     se=list(se.1, se.2, se.3, se.4, NULL, NULL, NULL, NULL),
                     omit=c("aetna", "ambetter", "cigna", "community_choice", "firstcare", "humana",
                            "molina", "scott_white", "ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                            "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                            "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                     column.labels=c("All plans", "Non-BCBS plans", "BCBS plans"),
                     column.separate=c(1,1,2),
                     dep.var.labels.include=F, dep.var.caption="",
                     covariate.labels = c("Discharge ratio", "Deductible is 0", "ln(deductible$| >$0)",
                                          "ln(maximum out-of-pocket expense)", "HMO"),
                     omit.stat=c("adj.rsq", "f", "ser"), 
                     add.lines=list(c("Insurer fixed effects", "Yes", "Yes", "N/A", "N/A")), 
                     column.sep.width="3pt", notes= "Significance", digits=3,
                     title="Relationship between Network Breadth and Plan Premiums")
  

#########################################
# Make New Plot to Communicate Results ##
#########################################
pdf("./rep_figs/figure_new_data_scatter_v2_notitle.pdf",height=12,width=14)
  
  par(mai=c(1.5,1.5,.75,2.5),xpd=F)
  plot(data$frac[data$bcbs==1 & data$hmo==0],data$premium[data$bcbs & data$hmo==0],
       pch=1,col="purple",xlim=c(0,1),ylim=c(150,max(data$premium)+25),cex=2,
       lwd=4,ylab="Premium in USD",xlab="Discharge Percentage",cex.lab=1.4,las=1,cex.axis=1.3,bty="n")
  points(data$frac[data$hmo==1 & data$bcbs==1],data$premium[data$hmo==1 & data$bcbs==1],pch=5,col="purple3",lwd=4,cex=2)
  points(data$frac[data$bcbs==0],data$premium[data$bcbs==0],pch=2,col="gray50",cex=2,lwd=4)
  model <- lm(premium ~ frac, data=data)
  abline(model,lwd=3,col="gray50")
  model <- lm(premium ~ frac, data=data[data$bcbs==1,])
  abline(model,lwd=3,col="purple")
  par(xpd=T)
  #title("New Figure: BCBS Influence on Overall Premium Discharge Correlation",cex.main=1.3)
  legend(.1,390,legend=c("All Plans","BCBS (HMO + PPO)"),lty=c(1,1),col=c("gray50","purple3"),bty="n",ncol=2,cex=1.4,lwd=4)

dev.off()

# make a group variable
  data$group <- ""
  data$group[data$hmo==0 & data$bcbs==1] <- "BCBS PPO"
  data$group[data$hmo==1 & data$bcbs==1] <- "BCBS HMO"
  data$group[data$bcbs==0] <- "Other"
  data$group <- as.factor(data$group)
  data$group = factor(data$group,levels(data$group)[c(2,1,3)])
  
  data$high <- 0
  data$high[data$premium>mean(data$premium)] <- 1  # cutoff at 237
# Boxplot
pdf("./rep_figs/figure_new_boxplot_v2_notitle.pdf",height=12,width=14)
  
  p <- ggplot(data, aes(factor(bcbs),fill = factor(high),frac))
  p + geom_boxplot() + 
    theme(legend.position = "none",text=element_text(size=20)) +
    labs(list(y="Discharge Percentage",x="")) +
    theme(axis.ticks = element_blank(), axis.text.x = element_blank()) + 
    annotate("text", x = .825, y = -.05, label = "Low Premium \n Non-BCBS") +
    annotate("text", x = 1.23, y = -.05, label = "Hi Premium \n Non-BCBS") +
    annotate("text", x = 1.825, y = -.05, label = "Low Premium \n BCBS") +
    annotate("text", x = 2.23, y = -.05, label = "Hi Premium \n BCBS") 

dev.off()

########################
# Table 4 - Appendix ##
#######################
  # decided not to do -- just summary data stats, not needed

############################################################################# 
# Figure 1 - Appendix - scatter plot of Discharge Frac v. Consumer Surplus ##
#############################################################################
pdf("./rep_figs/figure1_appendix_mod_notitle.pdf",height=12,width=14)  

  par(mai=c(1,2,1,1),xpd=F)
  plot(data$CS,data$frac,type="n",xlab="Consumer Surplus for Network",ylab="Share of Discharges",las=1,cex.axis=1.3,cex.lab=1.4,xlim=c(0,6),bty="n")
  grid(lty=1,col="gray75")
  points(data$CS[data$hmo==0 & data$bcbs==1],data$frac[data$hmo==0 & data$bcbs==1],pch=1,lwd=4,cex=2)
  points(data$CS[data$hmo==1 & data$bcbs==1],data$frac[data$hmo==1 & data$bcbs==1],pch=5,lwd=4,cex=2,col="red")
  points(data$CS[data$bcbs==0],data$frac[data$bcbs==0],pch=2,lwd=4,cex=2,col="forestgreen")
  #title("Appendix Figure 1: Measures of Network Breadth and Expected Utility",cex.main=2)
  par(xpd=TRUE)
  legend(1.2,1.12,legend=c("Other","BCBC (HMO)","BCBS (PPO)"),pch=c(2,5,1),col=c("forestgreen","red","black"),bty="n",ncol=3,cex=1.4,lty=c(0,0,0),lwd=4)
  
dev.off()  
  
## boxplot version
  p <- ggplot(data, aes(factor(group),fill = factor(group),frac))
  p.new <- p + geom_boxplot() +
  theme(legend.position = "none",text=element_text(size=20)) +
    labs(list(y="Discharge Fraction",x="")) +
    theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
    annotate("text", x = 1, y = -.07, label = "BCBS PPO") +
    annotate("text", x = 2, y = -.07, label = "BCBS HMO") +
    annotate("text", x = 3, y = -.07, label = "Other") 
  q <-  ggplot(data, aes(factor(group),fill = factor(group),CS))
  q.new <- q + geom_boxplot() +
  theme(legend.position = "none",text=element_text(size=20)) +
    labs(list(y="Consumer Surplus",x="")) +
    theme(axis.ticks = element_blank(), axis.text.x = element_blank()) +
    annotate("text", x = 1, y = -.07, label = "BCBS PPO") +
    annotate("text", x = 2, y = -.07, label = "BCBS HMO") +
    annotate("text", x = 3, y = -.07, label = "Other") 
  
pdf("./rep_figs/cs_frac_comp_barchar_vertical.pdf",height=11,width=8))
    lay_out(list(p.new, 1, 1),
            list(q.new, 2, 1))
dev.off()

####################################################################
## Table 5 Appendix: same as Table 1 but with CS instead of frac ###
####################################################################
# Regression 1
  cs.m1 <- lm(ln_prem ~ CS + no.d + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)

# adjust se's for clustering
  cs.m1.vcovCL<-get_CL_vcov(cs.m1, data$cluster)

# extract se's on diagonal
  cs.se.1 <- sqrt(diag(cs.m1.vcovCL))

# Regression 2
  cs.m2 <- lm(ln_prem ~ CS + no.d + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  cs.m2.vcovCL<-get_CL_vcov(cs.m2, data$cluster[data$bcbs==0])
  cs.se.2 <- sqrt(diag(cs.m2.vcovCL))

# Regression 3
  cs.m3 <- lm(ln_prem ~ CS + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  cs.m3.vcovCL<-get_CL_vcov(cs.m3, data$cluster[data$bcbs==1])
  cs.se.3 <- sqrt(diag(cs.m3.vcovCL))

# Regression 4
  cs.m4 <- lm(ln_prem ~ CS + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  cs.m4.vcovCL<-get_CL_vcov(cs.m4, data$cluster[data$bcbs==1])
  cs.se.4 <- sqrt(diag(cs.m4.vcovCL))

## make a table
  # Make a table to report the results
  table <- stargazer(m1,m2,m3,m4, 
                     se=list(se.1, se.2, se.3, se.4, NULL, NULL, NULL, NULL),
                     omit=c("aetna", "ambetter", "cigna", "community_choice", "firstcare", "humana",
                            "molina", "scott_white", "ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                            "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                            "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                     column.labels=c("All plans", "Non-BCBS plans", "BCBS plans"),
                     column.separate=c(1,1,2),
                     dep.var.labels.include=F, dep.var.caption="",
                     covariate.labels = c("Consumer Surplus from Network", "Deductible is 0", "ln(deductible$| >$0)",
                                          "ln(maximum OOP)", "HMO"),
                     omit.stat=c("adj.rsq", "f", "ser"), 
                     add.lines=list(c("Insurer fixed effects", "Yes", "Yes", "N/A", "N/A")), 
                     column.sep.width="3pt", notes= "Significance", digits=3,
                     title="Online Appendix Table - Relationship between Expected Utility and Plan Premiums")
  
  
#########################################
## Bronze Plans
#########################################
  data <- data.all
  data <- data[data$metal=="Bronze",]
 
###########################################
## Regression for Bronze Plans - Table 1 ##
###########################################
# Regression 0 (not reported)
  b.m0 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop + bcbs + 
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
# Regression 1
  b.m1 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
  
  # adjust se's for clustering
  b.m1.vcovCL<-get_CL_vcov(b.m1, data$cluster)
  # extract se's on diagonal
  b.se.1 <- sqrt(diag(b.m1.vcovCL))
  
  # Regression 2
  b.m2 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  b.m2.vcovCL<-get_CL_vcov(b.m2, data$cluster[data$bcbs==0])
  b.se.2 <- sqrt(diag(b.m2.vcovCL))
  
  ## I think the issue with the max_oop estimation here is that the non-bcbs plans
  # are entirely grouped into the top two expense categories
  # in the Silver plans, there are some non-bcbs plans in all of the max oop
  # expense categories
  
# Regression 3
  b.m3 <- lm(ln_prem ~ frac + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  b.m3.vcovCL<-get_CL_vcov(b.m3, data$cluster[data$bcbs==1])
  b.se.3 <- sqrt(diag(b.m3.vcovCL))
  
# Regression 4
  b.m4 <- lm(ln_prem ~ frac + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  b.m4.vcovCL<-get_CL_vcov(b.m4, data$cluster[data$bcbs==1])
  b.se.4 <- sqrt(diag(b.m4.vcovCL))
  
###################################
## Make Table 1 for bronze plans ##
###################################
  # Make a table to report the results
  table <- stargazer(m1,m2,m3,m4, 
                     se=list(se.1, se.2, se.3, se.4, NULL, NULL, NULL, NULL),
                     omit=c("aetna", "ambetter", "cigna", "community_choice", "firstcare", "humana",
                            "scott_white", "ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                            "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                            "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                     column.labels=c("All plans", "Non-BCBS plans", "BCBS plans"),
                     column.separate=c(1,1,2),
                     dep.var.labels.include=F, dep.var.caption="",
                     covariate.labels = c("Discharge ratio", "ln(deductible$| >$0)",
                                          "ln(maximum out-of-pocket expense)", "HMO"),
                     omit.stat=c("adj.rsq", "f", "ser"), 
                     add.lines=list(c("Insurer fixed effects", "Yes", "Yes", "N/A", "N/A")), 
                     column.sep.width="3pt", notes= "Significance", digits=3,
                     title="Relationship between Network Breadth and Plan Premiums: Bronze plans")
  
####################################
## GOLD
####################################
  data <- data.all
  data <- data[data$metal=="Gold",]
  
###############################
## Regression for Gold Plans ##
############################### 
# Regression 0 (not reported)
  g.m0 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop + bcbs + 
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
# Regression 1
  g.m1 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
  
  # adjust se's for clustering
  g.m1.vcovCL<-get_CL_vcov(g.m1, data$cluster)
  # extract se's on diagonal
  g.se.1 <- sqrt(diag(g.m1.vcovCL))
  
## CS Model 1
  g.m1.cs <- lm(ln_prem ~ CS + ln_deductible + ln_max_oop + aetna +
               ambetter + cigna + community_choice + firstcare + humana + scott_white +
               ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
               ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
               ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
  # adjust se's for clustering
  g.m1.cs.vcovCL<-get_CL_vcov(g.m1.cs, data$cluster)
  # extract se's on diagonal
  g.se.1.cs <- sqrt(diag(g.m1.cs.vcovCL))
  
# Regression 2
  g.m2 <- lm(ln_prem ~ frac + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  g.m2.vcovCL<-get_CL_vcov(g.m2, data$cluster[data$bcbs==0])
  g.se.2 <- sqrt(diag(g.m2.vcovCL))
  
# Regression 3
  g.m3 <- lm(ln_prem ~ frac + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  g.m3.vcovCL<-get_CL_vcov(g.m3, data$cluster[data$bcbs==1])
  g.se.3 <- sqrt(diag(g.m3.vcovCL))
  
# Regression 4
  g.m4 <- lm(ln_prem ~ frac + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  g.m4.vcovCL<-get_CL_vcov(g.m4, data$cluster[data$bcbs==1])
  g.se.4 <- sqrt(diag(g.m4.vcovCL))
  
##########################################################################
## Figure: comparing trends across plan types (Bronze, Silver, Gold) ##
########################################################################## 
  # bring in the data
  data <- data.all

  ##########################
  # make trends for graph
  model.b <- lm(premium ~ frac, data=data[data$bcbs==1 & data$metal=="Bronze",])
  model.s <- lm(premium ~ frac, data=data[data$bcbs==1 & data$metal=="Silver",])
  model.g <- lm(premium ~ frac, data=data[data$bcbs==1 & data$metal=="Gold",])
  
  # non-models
  model.bn <- lm(premium ~ frac, data=data[data$bcbs==0 & data$metal=="Bronze",])
  model.sn <- lm(premium ~ frac, data=data[data$bcbs==0 & data$metal=="Silver",])
  model.gn <- lm(premium ~ frac, data=data[data$bcbs==0 & data$metal=="Gold",])
  
  ###########################
  ## make the plot
pdf("./rep_figs/figure_all_plans_scatter.pdf",height=8,width=12)
  par(mfrow=c(1,2),mai = c(1.5, 1.5, .9,.8))
  
  ## Non-BCBS
  plot(data$frac,data$premium,
       type="n",xlim=c(0,1),ylim=c(min(data$premium)-25,max(data$premium)+25),
       ylab="Premium in USD",xlab="Discharge Percentage",cex.lab=1.4,las=1,cex.axis=1.3,bty="n")
  
  points(data$frac[data$bcbs==0  & data$metal=="Gold"],data$premium[data$bcbs==0 & data$metal=="Gold"],
         pch=1,col="gold",cex=2,lwd=4) 
  points(data$frac[data$bcbs==0  & data$metal=="Silver"],data$premium[data$bcbs==0 & data$metal=="Silver"],
         pch=1,col="gray55",cex=2,lwd=4) 
  points(data$frac[data$bcbs==0  & data$metal=="Bronze"],data$premium[data$bcbs==0 & data$metal=="Bronze"],
         pch=1,col="orange",cex=2,lwd=4) 
  abline(model.bn,lwd=3, col="orange")
  abline(model.sn,lwd=3, col="gray55")
  abline(model.gn,lwd=3, col="gold")
  text(.56,415, "Non-BCBS plans",cex=2)
  
  ## BCBS
  plot(data$frac,data$premium,
       type="n",xlim=c(0,1),ylim=c(min(data$premium)-25,max(data$premium)+25),
       ylab="Premium in USD",xlab="Discharge Percentage",cex.lab=1.4,las=1,cex.axis=1.3,bty="n")
  
  points(data$frac[data$bcbs==1  & data$metal=="Gold"],data$premium[data$bcbs==1 & data$metal=="Gold"],
         pch=1,col="gold",cex=2,lwd=4) 
  points(data$frac[data$bcbs==1  & data$metal=="Gold"],data$premium[data$bcbs==1 & data$metal=="Gold"],
         pch=1,col="gold",cex=2,lwd=4) 
  points(data$frac[data$bcbs==1  & data$metal=="Silver"],data$premium[data$bcbs==1 & data$metal=="Silver"],
         pch=1,col="gray55",cex=2,lwd=4) 
  points(data$frac[data$bcbs==1  & data$metal=="Bronze"],data$premium[data$bcbs==1 & data$metal=="Bronze"],
         pch=1,col="orange",cex=2,lwd=4) 
  abline(model.b,lwd=3, col="orange")
  abline(model.s,lwd=3, col="gray55")
  abline(model.g,lwd=3, col="gold")
  text(.46,415, "BCBS plans",cex=2)
  
dev.off()
  
#################################
## Model Dependence
#################################
# Restrict this analysis to silver plans
  data <- data.all
  data <- data[data$metal=="Silver",]

###############
## Run models
###############
# model 1 from paper
# punch-line: only changes the intercept
m1 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + aetna +
           ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
           ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
           ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
           ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)

# switch all insurance indcators
  for(vv in c("ambetter","cigna","community_choice","firstcare","humana","molina","scott_white","bcbs","aetna")) {
    new <- paste(vv,"_new",sep="")
    data[[new]] <- data[[vv]] - 1
    data[[new]][data[[new]]<0] <- data[[new]][data[[new]]<0]+2
  }
  
  m1.alt <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + aetna_new +
                 ambetter_new + cigna_new + community_choice_new + firstcare_new + humana_new + molina_new + scott_white_new +
                 ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                 ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                 ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)

# # punch-line: reverses sign of bcbs
#   m2 <-  lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + bcbs +
#               ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
#               ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
#               ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)
#   
#   m2.alt <-  lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + bcbs_new +
#                   ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
#                   ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
#                   ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)


# none in convex hull :( 
output <- summary(whatif(data=m1, cfact=data.frame(frac=data$frac,no.d=data$no.d,ln_deductible=data$ln_deductible,
                                                   ln_max_oop=data$ln_max_oop,aetna=data$aetna_new,ambetter=data$ambetter_new,
                                                   cigna=data$cigna_new, community_choice=data$community_choice_new,
                                                   firstcare=data$firstcare_new, humana=data$humana_new, molina=data$molina_new,
                                                   scott_white=data$scott_white_new, ra1=data$ra1,ra2=data$ra2,ra3=data$ra3,ra4=data$ra4,
                                                   ra5=data$ra5,ra6=data$ra6,ra7=data$ra7,ra8=data$ra8,ra9=data$ra9, 
                                                   ra10=data$ra10,ra11=data$ra11,ra12=data$ra12,ra13=data$ra13,ra14=data$ra14,ra15=data$ra15,
                                                   ra16=data$ra16,ra17=data$ra17, 
                                                   ra18=data$ra18,ra19=data$ra19,ra20=data$ra20,ra21=data$ra21,ra22=data$ra22,ra23=data$ra23,
                                                   ra24=data$ra24,ra25=data$ra25)))

# alternate models looking at premium rather than log premium (more interpretable)
m1.prem <- lm(premium ~ frac + no.d + ln_deductible + ln_max_oop + aetna +
                ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
                ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)

# Regression 2
m2.prem <- lm(premium ~ frac + no.d + ln_deductible + ln_max_oop +
                ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
                ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])

# Regression 3
m3.prem <- lm(premium ~ frac + ln_deductible +
                ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])

# Regression 4
m4.prem <- lm(premium ~ frac + ln_deductible + hmo +
                ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==1,])


# changing dependent variable to premium gives us qualitatively the same results (same sign and significance)

# excluding insurer fixed effects
# Regression 1
  m1 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop +
           ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
           ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
           ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data)

  # adjust se's for clustering
  m1.vcovCL<-get_CL_vcov(m1, data$cluster)
  # extract se's on diagonal
  se.1 <- sqrt(diag(m1.vcovCL))

# Regression 2
  m2 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop +
           ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
           ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
           ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  m2.vcovCL<-get_CL_vcov(m2, data$cluster[data$bcbs==0])
  se.2 <- sqrt(diag(m2.vcovCL))
  
  ### excluding insurer fe makes the estimates noisier, but does not
  # qualitatively change the results at all  
  
######################################
## Make Table 1  - no fixed effects ##
######################################
# Make a table to report the results
  table <- stargazer(m1,m2, 
                   se=list(se.1, se.2, NULL, NULL),
                   omit=c("ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                          "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                          "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                   column.labels=c("All plans", "Non-BCBS plans"),
                   column.separate=c(1,1),
                   dep.var.labels.include=F, dep.var.caption="",
                   covariate.labels = c("Discharge ratio", "Deductible is 0", "ln(deductible$| >$0)",
                                        "ln(maximum out-of-pocket expense)"),
                   omit.stat=c("adj.rsq", "f", "ser"), 
                   add.lines=list(c("Insurer fixed effects", "No", "No", "N/A", "N/A")), 
                   column.sep.width="3pt", notes= "Significance", digits=3,
                   title="Relationship between Network Breadth and Plan Premiums")


#########################################
## OOP Analysis -- do not include in paper
##########################################
# thought was if you have a smaller network, might have to go out of network
  # more often and increase OOP spending
  # but can't really capture that here b/c oop variable is oop limit, not
  # actual oop spending --- do not include in paper
  
data <- data.all
data <- data[data$metal %in% c("Gold","Silver","Bronze"),]

## create dummies for metals
data$gold <- 0
data$silver <- 0 
data$gold[data$metal=="Gold"] <-1
data$silver[data$metal=="Silver"] <- 1 

## run model
data <- data[data$metal=="Silver",]
oop.m1 <- lm(ln_max_oop ~ ln_prem + frac + aetna +
           ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
           ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
           ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
           ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])

oop.m2 <- lm(ln_max_oop ~ ln_prem + frac + ln_deductible + no.d + aetna +
               ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
               ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
               ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
               ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25, weights=popul_by_planmetal, data=data[data$bcbs==0,])

#######################################################################
## See if the equal variance assumption would hold across plan types
#######################################################################
# regression diagnostics!! 
data <- data.all
data <- data[data$metal %in% c("Gold","Silver","Bronze"),]

## create dummies for metals
data$gold <- 0
data$silver <- 0 
data$gold[data$metal=="Gold"] <-1
data$silver[data$metal=="Silver"] <- 1 

var.m1 <- lm(ln_prem ~ ln_max_oop + frac + aetna + ln_deductible + no.d +
               ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
               ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
               ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
               ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data)
# QQ plot shows relatively fat tails, but residual plots look fine in terms of homoscedasticity
  # include all tiers in a single model
  
# check to see if more aggressive transformation helps with tails
var.m2 <- lm(sqrt(premium) ~ ln_max_oop + frac + aetna + ln_deductible + no.d +
               ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
               ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
               ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
               ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data)
# QQ plot looks same/worse -- go with original model for interpretability 
  
## main specifications, including all metal tiers -- bronze is reference level
  # Regression 1
  m1 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data)
  m1.vcovCL<-get_CL_vcov(m1, data$cluster)
  se.1 <- sqrt(diag(m1.vcovCL))
  
  # Regression 2
  m2 <- lm(ln_prem ~ frac + no.d + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  m2.vcovCL<-get_CL_vcov(m2, data$cluster[data$bcbs==0])
  se.2 <- sqrt(diag(m2.vcovCL))
  
  # Regression 3
  m3 <- lm(ln_prem ~ frac + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m3.vcovCL<-get_CL_vcov(m3, data$cluster[data$bcbs==1])
  se.3 <- sqrt(diag(m3.vcovCL))
  
  # Regression 4
  m4 <- lm(ln_prem ~ frac + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m4.vcovCL<-get_CL_vcov(m4, data$cluster[data$bcbs==1])
  se.4 <- sqrt(diag(m4.vcovCL))

###################################
## Table 1 for all metal levels  ##
###################################
table <- stargazer(m1,m2,m3,m4, 
                     se=list(se.1, se.2, se.3, se.4, NULL, NULL, NULL, NULL),
                     omit=c("aetna", "ambetter", "cigna", "community_choice", "firstcare", "humana",
                            "molina", "scott_white", "ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                            "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                            "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                     column.labels=c("All plans", "Non-BCBS plans", "BCBS plans"),
                     column.separate=c(1,1,2),
                     dep.var.labels.include=F, dep.var.caption="",
                     covariate.labels = c("Discharge ratio", "Deductible is 0", "ln(deductible$| >$0)",
                                          "ln(maximum out-of-pocket expense)", "HMO", "Silver", "Gold"),
                     omit.stat=c("adj.rsq", "f", "ser"), 
                     add.lines=list(c("Insurer fixed effects", "Yes", "Yes", "N/A", "N/A")), 
                     column.sep.width="3pt", notes= "Significance", digits=3,
                     title="Relationship between Network Breadth and Plan Premiums: All plan tiers")

  # relationship btw network breadth and premiums stays the same 
  # gain some power, not very much insight -- plan tier has by far the largest impact on premium level
  # looks like deductible being 0 and other plan features become more important for
    # non-bcbs plans -- switching from a deductible to no deductible is associated with
    # about a exp(-.388) = 32% decrease in average premium 
  # holding tier constant, basically same effect
  
##################################  
## all plans, for consumer surplus
##################################  
  ## main specifications, including all metal tiers -- bronze is reference level
  # Regression 1
  m1 <- lm(ln_prem ~ CS + no.d + ln_deductible + ln_max_oop + aetna +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data)
  m1.vcovCL<-get_CL_vcov(m1, data$cluster)
  se.1 <- sqrt(diag(m1.vcovCL))
  
  # Regression 2
  m2 <- lm(ln_prem ~ CS + no.d + ln_deductible + ln_max_oop +
             ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==0,])
  m2.vcovCL<-get_CL_vcov(m2, data$cluster[data$bcbs==0])
  se.2 <- sqrt(diag(m2.vcovCL))
  
  # Regression 3
  m3 <- lm(ln_prem ~ CS + ln_deductible +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m3.vcovCL<-get_CL_vcov(m3, data$cluster[data$bcbs==1])
  se.3 <- sqrt(diag(m3.vcovCL))
  
  # Regression 4
  m4 <- lm(ln_prem ~ CS + ln_deductible + hmo +
             ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
             ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
             ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + silver + gold, weights=popul_by_planmetal, data=data[data$bcbs==1,])
  m4.vcovCL<-get_CL_vcov(m4, data$cluster[data$bcbs==1])
  se.4 <- sqrt(diag(m4.vcovCL))

###################################
## Table 2 for all metal levels  ##
###################################
table <- stargazer(m1,m2,m3,m4, 
                     se=list(se.1, se.2, se.3, se.4, NULL, NULL, NULL, NULL),
                     omit=c("aetna", "ambetter", "cigna", "community_choice", "firstcare", "humana",
                            "molina", "scott_white", "ra1","ra2", "ra3", "ra4", "ra5", "ra6", "ra7", "ra8", "ra8", "ra9",
                            "ra10", "ra11", "ra12", "ra13", "ra14", "ra15", "ra16", "ra17", "ra18",
                            "ra19", "ra20", "ra21", "ra22", "ra23", "ra24", "ra25", "Constant"),
                     column.labels=c("All plans", "Non-BCBS plans", "BCBS plans"),
                     column.separate=c(1,1,2),
                     dep.var.labels.include=F, dep.var.caption="",
                     covariate.labels = c("Consumer Surplus from Network", "Deductible is 0", "ln(deductible$| >$0)",
                                          "ln(maximum out-of-pocket expense)", "HMO", "Silver", "Gold"),
                     omit.stat=c("adj.rsq", "f", "ser"), 
                     add.lines=list(c("Insurer fixed effects", "Yes", "Yes", "N/A", "N/A")), 
                     column.sep.width="3pt", notes= "Significance", digits=3,
                     title="Relationship between Consumer Surplus and Plan Premiums: All plan tiers")
  
## again, same relationships evident but with more power
  #  plan tier determins premium level
  
##################################
## GLM attempt #1
#################################
glm.m1 <- glm(ln_prem ~ ln_max_oop + frac + aetna + ln_deductible + no.d +
                ambetter + cigna + community_choice + firstcare + humana + molina + scott_white +
                ra1 + ra2 + ra3 + ra4 + ra5 + ra6 + ra7 + ra8 + ra9 + 
                ra10 + ra11 + ra12 + ra13 + ra14 + ra15 + ra16 + ra17 + 
                ra18 + ra19 + ra20 + ra21 + ra22 + ra23 + ra24 + ra25 + gold + silver, weights=popul_by_planmetal, 
                data=data, family=gaussian(link=log))

##################################
## Simulation Analysis
#################################
## start of by figuring out hopsital-network-ratings area observation space
data <- data.all
data <- data[data$metal=="Silver",] #251 obs at this point

## format fraction of total into a numeric
data$ftot <- as.numeric(gsub("%","",data$fractionoftotal))
data$ftot <- data$ftot/100  

## make new dataset
size <- 1000
sim <- matrix(NA,nrow=size,ncol=20)
  
## can then figure out which hospital-ra observations need to appear
  # more than once for networks... but need to simulate which networks
  # a hospital would be in. Could do based on frac, but then this
  # basically is doing what they use these to predict. Not a lot of point
  # in that, becuase then we are just modeling what we just simulated.

#######################################
## RA Analysis
#######################################
data <- data.all
data <- data[data$metal %in% c("Gold","Silver","Bronze"),]

## Determine where insurer's are located
temp <- data[,c("namera","ra","insurer","population")]
n <- as.data.frame(table(temp$ra))
names(n) <- c("ra","n.insurer")
temp <- merge(temp,n,by=c("ra"))
temp2 <- temp
temp2$insurer <- NULL
temp2 <- unique(temp2)
temp2.pop <- temp2[order(-temp2$population),]
temp2.n <- temp2[order(-temp2$n.insurer),]
table.temp <- as.data.frame(cbind(temp2.pop[,2],temp2.pop[,3],temp2.n[,2],temp2.n[,4]))
names(table.temp) <- c("ra_pop","pop","ra_insurer","n.insurer")
write.csv(table.temp,file="./rep_figs/ra_table.csv",row.names=F)
